

###########################################################
##### Haiti elite network project  		          			#####
##### create network vars - older networks versions	  #####
##### 2021 mar 03                   									#####
###########################################################


## 1. collapse network data to fam level
## 2. merge in family characteristics
## 3. make igraph object
## 4. calculate network stats


periods <- c(1950, 1925)

for (z in periods) {
  
  ## set period
  period <- z
  
  
  #####
  ## prepare family network data
  #####
  
  ##
  ## read network data into graph form
  ##
  
  
  ## read in the ps edgelist 
  
  ps_graph <- read.graph('01_Data/02_Clean/gene_ps_all.graphml',format='graphml')
  
  
  ##
  ## subset to only the right group of the network
  ##
  
  ps_mat <- get.data.frame(ps_graph, what=c('edges'))
  ps_att <- get.data.frame(ps_graph, what=c('vertices'))
  
  
  ## subset to teh right time period
  
  if (period == 1950) {
    ps_att <- subset(ps_att, ps_att$cohort >= 1850 & ps_att$cohort <= 1950)
    ps_mat <- subset(ps_mat, ps_mat$cohort25 >= "(1850,1875]" & ps_mat$cohort25 <= "(1925,1950]")
  }
  if (period == 1925) {
    ps_att <- subset(ps_att, ps_att$cohort >= 1850 & ps_att$cohort <= 1925)
    ps_mat <- subset(ps_mat, ps_mat$cohort25 >= "(1850,1875]" & ps_mat$cohort25 <= "(1900,1925]")
  }
  
  
  ##
  ## collapse to fam
  ##
  
  # create attribute matrix for ego
  fam_mat <- subset(ps_mat, select = c(from, to, cohort15, cohort25, cohort10, parent, spouse))
  fam_mat <- merge(fam_mat, ps_att, by.x='from', by.y='name', all.x=T)
  
  # subset to non-zero spouse edges
  fam_mat <- subset(fam_mat, fam_mat$spouse!=0, select = c("from", "to", "family", "cohort"))
  
  # merge in family name of alter
  setnames(fam_mat, c('family', 'cohort'), c('family_e', 'cohort_e'))
  fam_mat <- merge(fam_mat, ps_att, by.x = "to", by.y = "name", all.x = T)
  fam_mat <- subset(fam_mat, select = c("family_e", "cohort_e", "family", "cohort"))
  setnames(fam_mat, c('family', 'cohort'), c('family_a', 'cohort_a'))
  
  # get rid of people with family name "NA"
  fam_mat <- subset(fam_mat, fam_mat$family_a!="NA")
  fam_mat <- subset(fam_mat, fam_mat$family_e!="NA")
  
  ## pull out list of unique families
  fam_att <- data.frame("family" = unique(ps_att$family))
  
  
  #####
  ## merge family characteristics into network data
  #####
  
  ##
  ## merge biz info into gen
  ##
  
  own <- read.dta("01_Data/02_Clean/fam_biz_prod.dta")
  own <- data.table(own[own$fam!="NA",])
  own <- own[, lapply(.SD, sum, na.rm=T), 
             .SDcols = c('value_1', 'value_wo_02', 'value_ha_02', 'value_wo_0212', 
                         'value_wo_autoc', 'value_log', 'value_log0212', 'value_logautoc',
                         'value_bin', 'value_bin0212', 'value_binautoc'),
             by = 'fam']
  own$biz <- 1
  
  
  ## merge in to fam network data
  
  fam_att <- merge(fam_att, own, by.x = "family", by.y = 'fam', all.x = T)
  table(na.omit(fam_att$biz))/dim(fam_att)[1]   # proportion of elite fams who are biz-owners
  own$fam[!(own$fam %in% fam_att$family)]   # biz owning fams who are not in gen data
  own$fam[(own$fam %in% fam_att$family)]   # biz owning fams who are in gen daa
  length(own$fam[(own$fam %in% fam_att$family)])/length(own$fam)   # prop of biz owners who are matched in gen data
  
  
  
  ##
  ## merge political variables into family gen data
  ##
  
  ## supplice pol bios
  pol <- read.csv("01_Data/02_Clean/polbios_fam.csv", as.is=T)
  pol$X = NULL
  
  fam_att <- merge(fam_att, pol, by.x = "family", by.y = "name_last", all.x = T)
  
  ## make dummy if at least one fam member served in legis, exec, or judic
  vars <- colnames(fam_att)[which(colnames(fam_att)=="legis"):which(colnames(fam_att)=="pol_duvpre1")]
  for (i in vars){
    fam_att[,vars] <- ifelse(fam_att[,vars] >= 1, 1, 0)
  }
  
  
  ##
  ## add other measures
  ##
  
  ## OFAC coups
  coup <- read.csv("01_Data/02_Clean/coup_fams.csv", as.is = T)
  coup$X = NULL
  setnames(coup, c('last_name', 'coup_bi'), c('family', 'coup'))
  fam_att <- merge(fam_att, coup, by = "family", all.x = T)
  
  ## immigration
  book <- read.csv("01_Data/02_Clean/all_immig.csv")
  book$X=NULL
  fam_att <- merge(fam_att, book, by.x = "family", by.y = "fam", all.x = T)
  
  ## NA -> 0
  vars <- colnames(fam_att)[which(colnames(fam_att)=="biz"):which(colnames(fam_att)=="african")]
  fam_att[,vars] <- apply(fam_att[,vars], 2, function(x) {x <- car::recode(x,"NA=0"); x})
  
  
  ##
  ## final steps
  ##
  
  ## check for duplicates
  fam_att[duplicated(fam_att$family)==T,]
  fam_att <- subset(fam_att, duplicated(fam_att$family)==F)
  
  ## create a value with zeros for non-econ elites
  fam_att$value_2 <- ifelse(is.na(fam_att$value_1)==T, 0, fam_att$value_1)
  fam_att$value_log_nona2 <- log(fam_att$value_2 + 1)
  fam_att$value_wo_02_nona <- ifelse(is.na(fam_att$value_wo_02)==T, 0, fam_att$value_wo_02)
  fam_att$value_ha_02_nona <- ifelse(is.na(fam_att$value_ha_02)==T, 0, fam_att$value_ha_02)
  fam_att$value_wo_0212_nona <- ifelse(is.na(fam_att$value_wo_0212)==T, 0, fam_att$value_wo_0212)
  fam_att$value_wo_autoc_nona <- ifelse(is.na(fam_att$value_wo_autoc)==T, 0, fam_att$value_wo_autoc)
  fam_att$value_log_nona <- ifelse(is.na(fam_att$value_log)==T, 0, fam_att$value_log)
  fam_att$value_log0212_nona <- ifelse(is.na(fam_att$value_log0212)==T, 0, fam_att$value_log0212)
  fam_att$value_logautoc_nona <- ifelse(is.na(fam_att$value_logautoc)==T, 0, fam_att$value_logautoc)
  fam_att$value_bin_nona <- ifelse(is.na(fam_att$value_bin)==T, 0, fam_att$value_bin)
  fam_att$value_bin0212_nona <- ifelse(is.na(fam_att$value_bin0212)==T, 0, fam_att$value_bin0212)
  fam_att$value_binautoc_nona <- ifelse(is.na(fam_att$value_binautoc)==T, 0, fam_att$value_binautoc)
  
  ## scale value to fall between 0 and 1
  fam_att$value_log_nona_scaled <- (fam_att$value_log_nona - min(fam_att$value_log_nona))/(max(fam_att$value_log_nona) - min(fam_att$value_log_nona))
  fam_att$value_log_nona_rank <- rank(fam_att$value_log_nona)
  fam_att$value_log_nona_bin <- car::recode(fam_att$value_log_nona, "0=0;NA=NA;else=1")
  m <- min(fam_att$value_log_nona[fam_att$value_log_nona>0], na.rm=T)
  fam_att$value_log_nona_pos10 <- car::recode(fam_att$value_log_nona, "0:m=m")
  
  
  #####
  ## set up graph
  #####
  
  fam_mat <- cbind.data.frame('ego'=fam_mat$family_e, "alter"=fam_mat$family_a, 
                              'spouse_tie' = rep(1, dim(fam_mat)[1]), 'cohort'=fam_mat$cohort_e)
  
  ## make into graph object
  fam_graph <- graph.data.frame(fam_mat,vertices=fam_att,directed=T)
  
  
  # make undirected
  fam_graph <- as.undirected(fam_graph, mode = 'collapse', edge.attr.comb='first')
  
  
  ## write fam graph and elite graph to graph objects
  
  if (period == 1950) {
    write.graph(fam_graph,'01_Data/02_Clean/fam_graph_1950.graphml',format='graphml')
  }
  if (period == 1925) {
    write.graph(fam_graph,'01_Data/02_Clean/fam_graph_1925.graphml',format='graphml')
  }
  
  
  #####
  ## calculate network stats
  #####
  
  ##
  ## write function for bonancich centrality
  ##
  
  ## calculate bonacich centrality (wiht optoinal weights by nodes)
  bonpow2 <- function(adj, beta, nodew = 1){
    solve(diag(dim(adj)[1]) - (beta)*adj) %*% matrix(nodew, nrow(adj), 1)
  }
  
  ## calculate M matrix
  makeMmatrix <- function(adj, beta){
    solve(diag(dim(adj)[1]) - (beta)*adj)
  }
  
  ## calculate bonpow centrality
  bonpowM <- function(M, nodew = 1){
    M %*% matrix(nodew, nrow(M), 1)
  }
  
  
  
  #####
  ## number of individuals in time period by family
  #####
  
  fams = na.omit(unique(ps_att$family))
  nind <- data.frame('fam' = rep(NA, length(fams)), 'nind' = rep(NA, length(fams)))
  
  for (i in 1:length(fams)){
    attributes <- ps_att[ps_att$family == fams[i],]
    nind$nind[i] <- dim(attributes)[1]
    nind$fam[i] <- unique(attributes$fam)
  }
  
  
  ## weighted bonacich taking family size into account
  
  nind <- nind[order(nind$fam),]
  
  fam_graph <- set.vertex.attribute(fam_graph, 'nind', index=V(fam_graph), nind$nind)
  fam_edge <- get.edgelist(fam_graph)
  fam_edge <- data.frame('fam_e' = fam_edge[,1], 'fam_a' = fam_edge[,2])
  fam_edge <- merge(fam_edge, data.frame('fam_e' = nind$fam, 'nind_e' = nind$nind), by = 'fam_e', all.x = T)
  fam_edge <- merge(fam_edge, data.frame('fam_a' = nind$fam, 'nind_a' = nind$nind), by = 'fam_a', all.x = T)
  fam_edge$nind_w <- 1/(fam_edge$nind_e*fam_edge$nind_a)^(1/2)
  
  fam_graph <- set.edge.attribute(fam_graph, 'nind_w', index=E(fam_graph), fam_edge$nind_w)
  
  ## export adjacency matrix with weights
  fam_adj_wnind <- get.adjacency(fam_graph, attr='nind_w')
  fam_att_wnind <- get.data.frame(fam_graph, what=c('vertices'))
  
  ## get adjacency and take max eigenvalue
  eigens <- eigen(fam_adj_wnind)
  e <- max(eigens$values)
  min(eigens$values)
  
  
  ##
  ## value and family size-weighted measures
  ##
  
  
  ## calculate nind-weighted centrality
  M_wnind_02 <- makeMmatrix(adj = fam_adj_wnind, beta = 0.2*(1/e))
  
  bonw_02_wnind <- bonpowM(M = M_wnind_02, nodew = fam_att_wnind$value_log_nona)
  
  fam_cent <- cbind.data.frame("family" = fam_att_wnind$name,
                               'bonw_02_wnind' = matrix(bonw_02_wnind))
  
  
  
  #####
  ## merge it together
  #####
  
  setnames(fam_cent,
           colnames(fam_cent)[2:dim(fam_cent)[2]],
           paste0(colnames(fam_cent)[2:dim(fam_cent)[2]], '.', period))
  
  
  if (period == 1925) {
    write.csv(fam_cent, '01_Data/02_Clean/centrality_1925.csv')
  }
  if (period == 1950) {
    write.csv(fam_cent, '01_Data/02_Clean/centrality_1950.csv')
  }
  
}





